/*

Balance tables for "Making the Grade"

*/
#delimit;
set trace on;






	*list of variables to compute stats for;
	
		local balancevars
			/*1 */here_for_endline
			/*2 */Male
			/*3 */Age
			
			/*4 */BL_EGRA_PCA_Index
			/*5 */BL_EGRAAnyRight
			/*6 */BL_EGRA_LN_Total
			/*7 */BL_EGRA_IS_Total
			/*8 */BL_EGRA_FW_Total
			/*9 */BL_EGRA_IW_Total
			/*10*/BL_EGRA_ORF_Total
			/*11*/BL_EGRA_RC_Total

			/*12*/BL_EGWA_PCA_Index
			/*13*/BL_EGWAAnyRight
			/*14*/BL_EGWA_AN
			/*15*/BL_EGWA_EN
			/*16*/BL_EGWA_ide
			/*17*/BL_EGWA_org
			/*18*/BL_EGWA_voi
			/*19*/BL_EGWA_wor
			/*20*/BL_EGWA_sen
			/*21*/BL_EGWA_con
			
			;    
			
	local samples baseline longitudinal attritted;
	
	
	
	

	
	foreach sample of local samples{;

		local n_balancevars : word count `balancevars';

		*file to store output;
			*copy blank template file I made;
			copy "${code_dir}/Balance Template.xls" "${dir}/Balance_`sample'.xls", replace;

			use "${dta_dir}/1_Pupil_Data_all.dta", clear;
			
			
			*make tables for each test;
			local tests EGRA EGWA OE;
			
			
			*compress to improve efficiency;
			compress _all;
			
			
			*indication for being in the attritted sample;
			gen attritted_sample = (longitudinal_sample==0);
			
			gen Male = 1-Female;
			
			*cut down to appropriate sample;
				keep if `sample'_sample==1;
			
			
			*temporary variable to store value of longitudinal_sample;
				*need this because when we run the part with the attrition regs for the longitudinal sample Stata gets angry regressing a vector of 1s on stuff;
			gen here_for_endline = longitudinal_sample;
			if "`sample'"=="longitudinal"{;
				replace here_for_endline = runiform();
			};
			

			
			
	
			
			
			matrix balance_stats = J(`n_balancevars',25,.);

			forvalues l=1/`n_balancevars'{;


				local var : word `l' of `balancevars';
				
				capture drop var;
				gen var = `var';
				

				
				
				*Summary stats;
						*store variable number;
						matrix balance_stats[`l',1] = `l';
						
						*overall;
						sum var;
						matrix balance_stats[`l',2] = r(mean);
						matrix balance_stats[`l',3] = r(sd);
						matrix balance_stats[`l',4] = r(N);


						
						*C;
						sum var if Study_Arm ==0;
						matrix balance_stats[`l',5] = r(mean);
						matrix balance_stats[`l',6] = r(sd);

						
						*MT_Program;
						sum var if Study_Arm ==2;
						matrix balance_stats[`l',8] = r(mean);
						matrix balance_stats[`l',9] = r(sd);

						
						*CCT_Program;
						sum var if Study_Arm ==1;
						matrix balance_stats[`l',11] = r(mean);
						matrix balance_stats[`l',12] = r(sd);			


					
				*randomization inference;
					local ri_pvals C_vs_FC C_vs_RC;
					foreach ri_pval of local ri_pvals{;
						preserve;
							keep Study_Arm group group_`ri_pval' var School_Code;
							local treat_est;
							if "`ri_pval'"=="C_vs_FC"{;
								local treat_est _b[2.Study_Arm];
							};
							if "`ri_pval'"=="C_vs_RC"{;
								local treat_est _b[1.Study_Arm];
							};
							ritest Study_Arm `treat_est', reps(1000) seed(100453) cluster(School_Code) strata(group_`ri_pval') noanalytics:
								areg var i.Study_Arm , a(group);
							matrix define p_`ri_pval' =r(p);
							local p_`ri_pval' = p_`ri_pval'[1,1]; 					
							
						restore;
					};								
								
					
					*control vs MT program;
					matrix balance_stats[`l',15] = `p_C_vs_FC';											

					*control vs CCT program;
					matrix balance_stats[`l',16] = `p_C_vs_RC';											

					capture drop MT_Program_var;
					capture drop CCT_Program_var;
					capture drop Control_var;
					gen MT_Program_var = MT_Program*var;
					gen CCT_Program_var = CCT_Program*var;
					gen Control_var = Control*var;
					


					
					
			};
			*end loop over variables for t-tests;

			*save matrix as a dataset;
			preserve;

				drop _all;
				
				*for longidutinal sample, blank out first row since it is just random numbers I generated as a placeholder;
				if "`sample'"=="longitudinal"{;
					forvalues jjj = 1/16{;
						matrix balance_stats[1,`jjj'] = .;
					};
				};

				svmat balance_stats, names(var);
				
				*give reasonable names to the variables;
					local statnames	/*1*/ varnum
									/*2*/ mean_all
									/*3*/ sd_all
									/*4*/ N_all
									/*5*/ mean_C
									/*6*/ sd_C
									/*7*/ pctnonzero_C
									/*8*/ mean_MT
									/*9*/ sd_MT
									/*10*/ pctnonzero_MT
									/*11*/ mean_CCT
									/*12*/ sd_CCT
									/*13*/ pctnonzero_CCT
									/*14*/ p_all
									/*15*/ p_MT_C
									/*16*/ p_CCT_C
									;
					

					local n_stats : word count `statnames';
									
					forvalues m=1/`n_stats'{;
						
						local stat : word `m' of `statnames';
						
						rename var`m' `stat';
					};
					
					*put in variable names;
					gen variable = "";
					order variable, first;
					forvalues l=1/`n_balancevars'{;
					
						local var : word `l' of `balancevars';
						
						replace variable = "`var'" if varnum==`l';
						
					};
				
												
				save "${dir}/Balance_`sample'.dta", replace;
				
				
				*formatted version for outputting;
				tostring mean_* sd*, format(%9.3f) replace force;
				
				tostring pctnonzero*, format(%9.3f) replace force;
				replace pctnonzero_C = "" if pctnonzero_C==".";
				replace pctnonzero_MT = "" if pctnonzero_MT==".";
				replace pctnonzero_CCT = "" if pctnonzero_CCT==".";
				
			
				*tack parentheses onto SDs;
				replace sd_all = "(" + sd_all + ")";
				replace sd_C = "(" + sd_C + ")";
				replace sd_MT = "(" + sd_MT + ")";
				replace sd_CCT = "(" + sd_CCT + ")";
									
				
				
				local pvals p_all p_MT_C p_CCT_C;
				foreach pval of local pvals{;
				
					*create star variable;
					gen str3 p_stars_`pval' = "";
					replace p_stars_`pval' = "*" if `pval'<0.1;
					replace p_stars_`pval' = "**" if `pval'<0.05;
					replace p_stars_`pval' = "***" if `pval'<0.01;
					
					*recode missings to 1;
					recode `pval' (.=1.000);
					
					*convert to string;
					tostring `pval', format(%9.3f) replace force;
				};					
				
				
				*add stars to means;
				local arms MT CCT;
				foreach arm of local arms{;
				
					*tack on stars;
					replace mean_`arm' = mean_`arm' + p_stars_p_`arm'_C;
				};							
				
				*fix first row for longitudinal sample;
				if "`sample'"=="longitudinal"{;
					replace mean_C = "1.000" if _n==1;
					replace mean_MT = "1.000" if _n==1;
					replace mean_CCT = "1.000" if _n==1;
					replace varnum=1 if _n==1;
					replace p_all = "1.000" if _n==1;
					replace p_MT_C = "1.000" if _n==1;
					replace p_CCT_C = "1.000" if _n==1;
					replace varnum=1 if _n==1;
					replace variable="here_for_endline" if _n==1;														
				};
				
				
				compress _all;

				

				

				save "${dir}/Balance formatted.dta", replace;

				local sheet_out Balance;
				
				
				export excel variable mean_C mean_MT mean_CCT p_all p_MT_C p_CCT_C
					using "${dir}/Balance_`sample'.xls",
					sheet("`sheet_out'") sheetmodify cell(A4);
				
			
			restore;

	};
	*end loop over samples;

#delimit ; 	

use "${dta_dir}/1_Pupil_Data_all.dta", clear;	
	
capture erase "${dir}/Attrition Predictors ${S_DATE}.txt";
capture erase "${dir}/Attrition Predictors ${S_DATE}.xls";

*test predictors of presence at endline;
	*local predictors Female Age BL_EGRA_PCA_Index BL_EGWA_PCA_Index BL_OE_PCA_Index;
	*just use same predictors as in Appendix Table A2;
	*doesn't matter for results;
	gen attritted = (longitudinal_sample==0);
	local predictors Female Age BL_EGRA_PCA_Index BL_EGWA_PCA_Index;

		
	reg attritted `predictors', 
		cluster(School_Code), if Study_Arm==0;
	outreg2 `predictors' 
		using "${dir}/Attrition Predictors ${S_DATE}.xls",
		adjr2 dec(3) fmt(fc) nocons excel keep(`predictors') 
		append;	
	reg attritted `predictors', cluster(School_Code), 
		if Study_Arm==1;
	outreg2 `predictors' 
		using "${dir}/Attrition Predictors ${S_DATE}.xls",
		adjr2 dec(3) fmt(fc) nocons excel keep(`predictors') 
		append;		
	reg attritted `predictors', cluster(School_Code), 
		if Study_Arm==2;
	outreg2 `predictors' 
		using "${dir}/Attrition Predictors ${S_DATE}.xls",
		adjr2 dec(3) fmt(fc) nocons excel keep(`predictors') 
		append;	

	local interactions;
	foreach predictor of local predictors{;
		gen `predictor'_X_MT = `predictor'*MT_Program;
		gen `predictor'_X_CCT = `predictor'*CCT_Program;
		local interactions `interactions' `predictor' `predictor'_X_MT `predictor'_X_CCT;
	};
	local interactions `interactions' MT_Program CCT_Program;
	reg attritted `interactions', cluster(School_Code);
	outreg2 `interactions' 
		using "${dir}/Attrition Predictors ${S_DATE}.xls",
		adjr2 dec(3) fmt(fc) nocons excel keep(`interactions') 
		sortvar(`interactions') append;				
	
capture erase "${dir}/Attrition Predictors ${S_DATE}.txt";	

	